source("./load_files.R")rafael <-
load_cdr_rafael() %>%
filter(rich == "rich")
thais <-
load_cdr_thais() %>%
filter(rich == "rich")
carol <-
load_cdr_carol() %>%
filter(rich == "rich")
cdr <-
full_join(rafael, thais) %>%
full_join(carol)
glimpse(cdr)## Rows: 4,271
## Columns: 46
## Groups: expgroup, cycle, time [14]
## $ cdr3 <chr> "GGVNWNDQ", "APAGREFDY", "PLTGLHY", "VQESWGLIDS", "DPGHGDDY…
## $ cycle <chr> "R4", "R4", "R4", "R4", "R4", "R4", "R4", "R4", "R4", "R4",…
## $ time <chr> "final", "final", "final", "final", "final", "final", "fina…
## $ expgroup <chr> "rafael", "rafael", "rafael", "rafael", "rafael", "rafael",…
## $ cdrp <dbl> 0.011217659, 0.004559435, 0.004197576, 0.003709065, 0.00351…
## $ quantity <int> 620, 252, 232, 205, 194, 186, 178, 337, 155, 153, 303, 151,…
## $ fcp <dbl> 252.05518, 204.89647, 188.63485, 166.68165, 157.73776, 151.…
## $ fcq <dbl> 310.0, 252.0, 232.0, 205.0, 194.0, 186.0, 178.0, 168.5, 155…
## $ length <int> 8, 9, 7, 10, 8, 15, 14, 7, 10, 9, 7, 7, 9, 6, 11, 10, 20, 1…
## $ MW <dbl> 888.8806, 1025.0717, 799.9134, 1133.2079, 874.8078, 1833.93…
## $ AV <dbl> 0.1250, 0.2222, 0.1429, 0.1000, 0.1250, 0.1333, 0.2143, 0.2…
## $ IP <dbl> 4.0500, 4.3704, 7.1654, 4.0500, 4.0500, 4.7241, 4.5365, 4.0…
## $ flex <dbl> 0.7795, 0.7601, 0.7171, 0.7599, 0.8070, 0.7617, 0.7513, 0.7…
## $ gravy <dbl> -1.4375, -0.9333, 0.0571, -0.0900, -2.1750, -1.2400, -0.992…
## $ SSF_Helix <dbl> 0.2500, 0.2222, 0.4286, 0.4000, 0.1250, 0.2000, 0.2857, 0.2…
## $ SSF_Turn <dbl> 0.5000, 0.2222, 0.2857, 0.3000, 0.3750, 0.1333, 0.2143, 0.2…
## $ SSF_Sheet <dbl> 0.0000, 0.3333, 0.2857, 0.2000, 0.0000, 0.2667, 0.1429, 0.2…
## $ n_A <int> 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ n_C <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ n_D <int> 1, 1, 0, 1, 3, 2, 2, 0, 1, 2, 1, 2, 1, 0, 3, 1, 1, 4, 0, 1,…
## $ n_E <int> 0, 1, 0, 1, 0, 2, 1, 2, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1,…
## $ n_F <int> 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1,…
## $ n_G <int> 2, 1, 1, 1, 2, 0, 2, 1, 3, 1, 2, 0, 2, 0, 0, 2, 5, 1, 2, 0,…
## $ n_H <int> 0, 0, 1, 0, 1, 2, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,…
## $ n_I <int> 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ n_K <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ n_L <int> 0, 0, 2, 1, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 2, 1, 1, 0, 0, 1,…
## $ n_M <int> 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1,…
## $ n_N <int> 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0,…
## $ n_P <int> 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0,…
## $ n_Q <int> 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ n_R <int> 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 2, 1, 0, 0, 1, 0, 3, 1, 0, 0,…
## $ n_S <int> 0, 0, 0, 2, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 3, 4, 3, 1, 3,…
## $ n_T <int> 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 2, 0,…
## $ n_V <int> 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 2, 0, 0,…
## $ n_W <int> 1, 0, 0, 1, 0, 0, 2, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,…
## $ n_Y <int> 0, 1, 1, 0, 1, 1, 0, 1, 4, 0, 0, 0, 1, 0, 1, 2, 4, 0, 2, 1,…
## $ aliphatic <int> 3, 4, 4, 4, 3, 4, 5, 2, 3, 3, 2, 1, 4, 2, 3, 4, 8, 3, 2, 2,…
## $ aromatic <int> 1, 2, 1, 1, 1, 2, 3, 2, 4, 1, 1, 2, 2, 1, 2, 2, 4, 1, 2, 2,…
## $ neutral <int> 3, 0, 1, 3, 0, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4, 4, 3, 3,…
## $ positive <int> 0, 1, 1, 0, 1, 3, 2, 0, 0, 2, 2, 1, 0, 0, 1, 0, 3, 1, 1, 1,…
## $ negative <int> 1, 2, 0, 2, 3, 4, 3, 2, 2, 2, 1, 2, 2, 1, 3, 1, 1, 5, 0, 2,…
## $ invalid <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ file <chr> "brigido_rafael_Nestor_resultado_nestor_VH_FinalRound_rafaC…
## $ threshold <dbl> 0.293833, 0.293833, 0.293833, 0.293833, 0.293833, 0.293833,…
## $ rich <fct> rich, rich, rich, rich, rich, rich, rich, rich, rich, rich,…
library(tidytext)
set.seed(42)
pca_rec <-
recipe( ~ ., data = cdr) %>%
step_rm(file, time, threshold, rich) %>%
step_rm(quantity, cdrp, fcp, fcq) %>%
update_role(all_nominal(), new_role = "id") %>%
step_string2factor(all_nominal()) %>%
step_nzv(all_numeric()) %>%
step_corr(all_numeric()) %>%
step_normalize(all_predictors()) %>%
step_pca(all_predictors())
pca_prep <- prep(pca_rec)
pca_prep## Data Recipe
##
## Inputs:
##
## role #variables
## id 6
## predictor 40
##
## Training data contained 4271 data points and no missing data.
##
## Operations:
##
## Variables removed file, time, threshold, rich [trained]
## Variables removed quantity, cdrp, fcp, fcq [trained]
## Factor variables from cdr3, cycle, expgroup [trained]
## Sparse, unbalanced variable filter removed n_C, invalid [trained]
## Correlation filter removed MW [trained]
## Centering and scaling for length, AV, IP, flex, gravy, ... [trained]
## PCA extraction with length, AV, IP, flex, gravy, SSF_Helix, ... [trained]
juice(pca_prep) %>%
ggplot(aes(PC1, PC2, label = cdr3)) +
geom_point(aes(color = expgroup)) +
geom_text(check_overlap = T, hjust = "inward", family = "IBMPlexSans") +
labs(color = NULL)library(plotly)
juice(pca_prep) %>%
plot_ly(
x = .$PC1,
y = .$PC2,
z = .$PC3,
type = "scatter3d",
mode = "markers",
color = .$expgroup
)# clean data
rm(pca_prep, pca_rec)library(tidytext)
set.seed(42)
kpca_poly_prep <-
recipe( ~ ., data = cdr) %>%
step_rm(file, time, threshold, rich) %>%
step_rm(quantity, cdrp, fcp, fcq) %>%
update_role(all_nominal(), new_role = "id") %>%
step_string2factor(all_nominal()) %>%
step_nzv(all_numeric()) %>%
step_corr(all_numeric()) %>%
step_normalize(all_predictors()) %>%
step_kpca_poly(all_predictors())
kpca_poly_prep <- prep(kpca_poly_prep)
kpca_poly_prep## Data Recipe
##
## Inputs:
##
## role #variables
## id 6
## predictor 40
##
## Training data contained 4271 data points and no missing data.
##
## Operations:
##
## Variables removed file, time, threshold, rich [trained]
## Variables removed quantity, cdrp, fcp, fcq [trained]
## Factor variables from cdr3, cycle, expgroup [trained]
## Sparse, unbalanced variable filter removed n_C, invalid [trained]
## Correlation filter removed MW [trained]
## Centering and scaling for length, AV, IP, flex, gravy, ... [trained]
## Polynomial kernel PCA (polydot) extraction with length, AV, IP, flex, ... [trained]
juice(kpca_poly_prep)## # A tibble: 4,271 x 8
## cdr3 cycle expgroup kPC1 kPC2 kPC3 kPC4 kPC5
## <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 GGVNWNDQ R4 rafael 213. 181. 20.9 346. -361.
## 2 APAGREFDY R4 rafael 187. 136. -106. -199. -26.4
## 3 PLTGLHY R4 rafael -10.0 173. -16.8 -320. 154.
## 4 VQESWGLIDS R4 rafael 176. 129. -143. -205. -112.
## 5 DPGHGDDY R4 rafael 144. 220. -41.5 635. -337.
## 6 DREVTEAHYHPMFDS R4 rafael -956. -55.2 -94.3 -216. -378.
## 7 DIRWEMGTGHWFDP R4 rafael -151. 45.8 -186. -184. -199.
## 8 ETWGPEY R4 rafael 196. 41.9 -74.9 90.5 -168.
## 9 EDGYSYGYGY R4 rafael 140. -51.0 33.9 -246. 15.4
## 10 DLHWGSVDH R4 rafael 163. 82.7 -54.1 -162. -257.
## # … with 4,261 more rows
juice(kpca_poly_prep) %>%
ggplot(aes(kPC1, kPC2, label = cdr3)) +
geom_point(aes(color = expgroup)) +
geom_text(check_overlap = T, hjust = "inward", family = "IBMPlexSans") +
labs(color = NULL)library(plotly)
juice(kpca_poly_prep) %>%
plot_ly(
x = .$kPC1,
y = .$kPC2,
z = .$kPC3,
type = "scatter3d",
mode = "markers",
color = .$expgroup
)# clean data
rm(kpca_poly_prep, kpca_rec)library(tidytext)
set.seed(42)
kpca_rbf_prep <-
recipe( ~ ., data = cdr) %>%
step_rm(file, time, threshold, rich) %>%
step_rm(quantity, cdrp, fcp, fcq) %>%
update_role(all_nominal(), new_role = "id") %>%
step_string2factor(all_nominal()) %>%
step_nzv(all_numeric()) %>%
step_corr(all_numeric()) %>%
step_normalize(all_predictors()) %>%
step_kpca_rbf(all_predictors())
kpca_rbf_prep <- prep(kpca_rbf_prep)
kpca_rbf_prep## Data Recipe
##
## Inputs:
##
## role #variables
## id 6
## predictor 40
##
## Training data contained 4271 data points and no missing data.
##
## Operations:
##
## Variables removed file, time, threshold, rich [trained]
## Variables removed quantity, cdrp, fcp, fcq [trained]
## Factor variables from cdr3, cycle, expgroup [trained]
## Sparse, unbalanced variable filter removed n_C, invalid [trained]
## Correlation filter removed MW [trained]
## Centering and scaling for length, AV, IP, flex, gravy, ... [trained]
## RBF kernel PCA (rbfdot) extraction with length, AV, IP, flex, ... [trained]
juice(kpca_rbf_prep)## # A tibble: 4,271 x 8
## cdr3 cycle expgroup kPC1 kPC2 kPC3 kPC4 kPC5
## <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 GGVNWNDQ R4 rafael 0.962 -1.63 -0.412 -0.751 -0.137
## 2 APAGREFDY R4 rafael -0.115 -1.05 0.255 -1.24 -3.19
## 3 PLTGLHY R4 rafael 0.686 -1.55 1.12 -0.290 0.298
## 4 VQESWGLIDS R4 rafael 0.161 -1.25 -0.252 -0.599 -0.118
## 5 DPGHGDDY R4 rafael -0.698 -1.17 -0.439 -0.783 -1.32
## 6 DREVTEAHYHPMFDS R4 rafael 0.865 -1.40 -0.482 -0.509 0.492
## 7 DIRWEMGTGHWFDP R4 rafael 0.865 -1.42 -0.457 -0.530 0.481
## 8 ETWGPEY R4 rafael 0.818 -1.73 -0.551 -2.24 -6.78
## 9 EDGYSYGYGY R4 rafael 0.905 14.7 0.244 3.10 0.928
## 10 DLHWGSVDH R4 rafael 0.696 -2.04 3.16 -0.0690 0.577
## # … with 4,261 more rows
juice(kpca_rbf_prep) %>%
ggplot(aes(kPC1, kPC2, label = cdr3)) +
geom_point(aes(color = expgroup)) +
geom_text(check_overlap = T, hjust = "inward", family = "IBMPlexSans") +
labs(color = NULL)library(plotly)
juice(kpca_rbf_prep) %>%
plot_ly(
x = .$kPC1,
y = .$kPC2,
z = .$kPC3,
type = "scatter3d",
mode = "markers",
color = .$expgroup
)# clean data
rm(kpca_rbf_prep, kpca_rec)library(tidytext)
set.seed(42)
ica_rec <-
recipe( ~ ., data = cdr) %>%
step_rm(file, time, threshold, rich) %>%
step_rm(quantity, cdrp, fcp, fcq) %>%
update_role(all_nominal(), new_role = "id") %>%
step_string2factor(all_nominal()) %>%
step_nzv(all_numeric()) %>%
step_corr(all_numeric()) %>%
step_normalize(all_predictors()) %>%
step_ica(all_predictors())
ica_prep <- prep(ica_rec)
ica_prep## Data Recipe
##
## Inputs:
##
## role #variables
## id 6
## predictor 40
##
## Training data contained 4271 data points and no missing data.
##
## Operations:
##
## Variables removed file, time, threshold, rich [trained]
## Variables removed quantity, cdrp, fcp, fcq [trained]
## Factor variables from cdr3, cycle, expgroup [trained]
## Sparse, unbalanced variable filter removed n_C, invalid [trained]
## Correlation filter removed MW [trained]
## Centering and scaling for length, AV, IP, flex, gravy, ... [trained]
## ICA extraction with length, AV, IP, flex, gravy, SSF_Helix, ... [trained]
juice(ica_prep)## # A tibble: 4,271 x 8
## cdr3 cycle expgroup IC1 IC2 IC3 IC4 IC5
## <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 GGVNWNDQ R4 rafael -0.259 -1.39 -0.135 -0.945 0.797
## 2 APAGREFDY R4 rafael -0.308 -0.536 0.407 -0.0471 -1.10
## 3 PLTGLHY R4 rafael 0.865 -0.635 -1.22 -0.125 -1.01
## 4 VQESWGLIDS R4 rafael -0.666 -0.251 -0.108 -0.652 -0.429
## 5 DPGHGDDY R4 rafael -0.624 -1.72 0.994 -0.452 0.156
## 6 DREVTEAHYHPMFDS R4 rafael 0.384 0.176 2.41 0.338 -1.33
## 7 DIRWEMGTGHWFDP R4 rafael 0.315 0.414 1.37 0.286 -0.563
## 8 ETWGPEY R4 rafael -1.08 -1.26 0.222 0.136 -0.563
## 9 EDGYSYGYGY R4 rafael -0.930 -0.338 -0.691 0.519 0.932
## 10 DLHWGSVDH R4 rafael 0.550 -0.951 0.298 0.355 -0.739
## # … with 4,261 more rows
juice(ica_prep) %>%
ggplot(aes(IC1, IC2, label = cdr3)) +
geom_point(aes(color = expgroup)) +
geom_text(check_overlap = T, hjust = "inward", family = "IBMPlexSans") +
labs(color = NULL)library(plotly)
juice(ica_prep) %>%
plot_ly(
x = .$IC1,
y = .$IC2,
z = .$IC3,
type = "scatter3d",
mode = "markers",
color = .$expgroup
)# clean data
rm(ica_prep, ica_rec)library(embed)
set.seed(42)
umap_rec <-
recipe( ~ ., data = cdr) %>%
step_rm(file, time, threshold, rich) %>%
step_rm(quantity, cdrp, fcp, fcq) %>%
update_role(all_nominal(), new_role = "id") %>%
step_string2factor(all_nominal()) %>%
step_nzv(all_numeric()) %>%
step_corr(all_numeric()) %>%
step_normalize(all_predictors()) %>%
step_umap(all_predictors(), num_comp = 3,
options = list(
verbose = T,
n_threads = parallel::detectCores()
))
umap_prep <- prep(umap_rec)
umap_prep## Data Recipe
##
## Inputs:
##
## role #variables
## id 6
## predictor 40
##
## Training data contained 4271 data points and no missing data.
##
## Operations:
##
## Variables removed file, time, threshold, rich [trained]
## Variables removed quantity, cdrp, fcp, fcq [trained]
## Factor variables from cdr3, cycle, expgroup [trained]
## Sparse, unbalanced variable filter removed n_C, invalid [trained]
## Correlation filter removed MW [trained]
## Centering and scaling for length, AV, IP, flex, gravy, ... [trained]
## UMAP embedding for length, AV, IP, flex, gravy, ... [trained]
juice(umap_prep) %>%
ggplot(aes(umap_1, umap_2, label = cdr3)) +
geom_point(aes(color = expgroup)) +
geom_text(check_overlap = T, hjust = "inward", family = "IBMPlexSans") +
labs(color = NULL)library(plotly)
juice(umap_prep) %>%
plot_ly(
x = .$umap_1,
y = .$umap_2,
z = .$umap_3,
type = "scatter3d",
mode = "markers",
color = .$expgroup
)# clean data
rm(umap_prep, umap_rec)library(Rtsne)
tsne_df <-
cdr %>%
ungroup() %>%
select(cdr3, expgroup, cycle, !where(is.character)) %>%
select(!c(which(apply(., 2, var)==0))) %>%
select(!c("quantity", "cdrp", "fcp", "fcq", "rich", "threshold")) %>%
mutate(across(where(is.character), as.factor))
set.seed(42)
tsne_out <-
tsne_df %>%
unique() %>%
Rtsne(
X = .,
dims = 3,
perplexity = 30,
theta = 0.5,
max_iter = 1E3,
verbose = T,
pca_center = T,
pca_scale = T,
normalize = T,
partial_pca = T,
eta = 200.0,
exaggeration_factor = 12.0,
num_threads = parallel::detectCores()
)## Performing PCA
## Read the 4271 x 50 data matrix successfully!
## OpenMP is working. 8 threads.
## Using no_dims = 3, perplexity = 30.000000, and theta = 0.500000
## Computing input similarities...
## Building tree...
## Done in 2.39 seconds (sparsity = 0.032983)!
## Learning embedding...
## Iteration 50: error is 86.293961 (50 iterations in 27.57 seconds)
## Iteration 100: error is 85.775593 (50 iterations in 9.25 seconds)
## Iteration 150: error is 85.535944 (50 iterations in 4.76 seconds)
## Iteration 200: error is 85.491334 (50 iterations in 4.92 seconds)
## Iteration 250: error is 85.488876 (50 iterations in 5.84 seconds)
## Iteration 300: error is 2.804065 (50 iterations in 3.87 seconds)
## Iteration 350: error is 2.410835 (50 iterations in 3.89 seconds)
## Iteration 400: error is 2.238190 (50 iterations in 4.06 seconds)
## Iteration 450: error is 2.138289 (50 iterations in 4.48 seconds)
## Iteration 500: error is 2.074880 (50 iterations in 5.23 seconds)
## Iteration 550: error is 2.032770 (50 iterations in 5.29 seconds)
## Iteration 600: error is 2.002685 (50 iterations in 5.71 seconds)
## Iteration 650: error is 1.980768 (50 iterations in 5.65 seconds)
## Iteration 700: error is 1.964674 (50 iterations in 5.53 seconds)
## Iteration 750: error is 1.951831 (50 iterations in 5.38 seconds)
## Iteration 800: error is 1.942696 (50 iterations in 5.50 seconds)
## Iteration 850: error is 1.936658 (50 iterations in 5.35 seconds)
## Iteration 900: error is 1.931697 (50 iterations in 5.39 seconds)
## Iteration 950: error is 1.926850 (50 iterations in 6.20 seconds)
## Iteration 1000: error is 1.922362 (50 iterations in 5.35 seconds)
## Fitting performed in 129.22 seconds.
cdr %>%
ungroup() %>%
select(cdr3, expgroup, cycle, rich, !where(is.character)) %>%
select(!c(which(apply(., 2, var)==0))) %>%
select(!c("quantity", "cdrp", "fcp", "fcq", "threshold")) -> a
tsne_out %>%
.$Y %>%
as_tibble() %>%
ggplot() +
geom_point(aes(V1, V2, color = a$expgroup))tsne_out %>%
.$Y %>%
as_tibble() %>%
plot_ly(
x = .$V1,
y = .$V2,
z = .$V3,
type = "scatter3d",
mode = "markers",
color = a$expgroup
) %>%
layout(title = "Comparison Between Experiments")library(Rtsne)
tsne_df <-
cdr %>%
ungroup() %>%
select(!where(is.character)) %>%
# select(cdr3, expgroup, cycle, !where(is.character)) %>%
select(!c(which(apply(., 2, var)==0))) %>%
select(!c("quantity", "cdrp", "fcp", "fcq", "rich", "threshold")) %>%
mutate(across(where(is.character), as.factor))
set.seed(42)
tsne_out <-
tsne_df %>%
unique() %>%
Rtsne(
X = .,
dims = 3,
perplexity = 30,
theta = 0.5,
max_iter = 1E3,
verbose = T,
pca_center = T,
pca_scale = T,
normalize = T,
# partial_pca = T,
eta = 200.0,
exaggeration_factor = 12.0,
num_threads = parallel::detectCores()
)## Performing PCA
## Read the 3849 x 34 data matrix successfully!
## OpenMP is working. 8 threads.
## Using no_dims = 3, perplexity = 30.000000, and theta = 0.500000
## Computing input similarities...
## Building tree...
## Done in 1.67 seconds (sparsity = 0.034859)!
## Learning embedding...
## Iteration 50: error is 85.209017 (50 iterations in 11.08 seconds)
## Iteration 100: error is 85.074387 (50 iterations in 9.01 seconds)
## Iteration 150: error is 85.065576 (50 iterations in 6.01 seconds)
## Iteration 200: error is 85.062826 (50 iterations in 5.59 seconds)
## Iteration 250: error is 85.061685 (50 iterations in 5.05 seconds)
## Iteration 300: error is 3.107545 (50 iterations in 5.87 seconds)
## Iteration 350: error is 2.661810 (50 iterations in 4.22 seconds)
## Iteration 400: error is 2.481345 (50 iterations in 4.26 seconds)
## Iteration 450: error is 2.381366 (50 iterations in 5.19 seconds)
## Iteration 500: error is 2.318935 (50 iterations in 4.33 seconds)
## Iteration 550: error is 2.278227 (50 iterations in 4.46 seconds)
## Iteration 600: error is 2.250068 (50 iterations in 4.42 seconds)
## Iteration 650: error is 2.229087 (50 iterations in 4.49 seconds)
## Iteration 700: error is 2.214116 (50 iterations in 4.65 seconds)
## Iteration 750: error is 2.202790 (50 iterations in 4.33 seconds)
## Iteration 800: error is 2.194687 (50 iterations in 4.51 seconds)
## Iteration 850: error is 2.188096 (50 iterations in 5.57 seconds)
## Iteration 900: error is 2.182072 (50 iterations in 4.58 seconds)
## Iteration 950: error is 2.177503 (50 iterations in 4.35 seconds)
## Iteration 1000: error is 2.172849 (50 iterations in 4.51 seconds)
## Fitting performed in 106.49 seconds.
cdr %>%
ungroup() %>%
select(!where(is.character)) %>%
# select(cdr3, expgroup, cycle, rich, !where(is.character)) %>%
select(!c(which(apply(., 2, var)==0))) %>%
select(!c("quantity", "cdrp", "fcp", "fcq", "threshold")) -> a
tsne_out %>%
.$Y %>%
as_tibble() %>%
ggplot() +
geom_point(aes(V1, V2, color = a$expgroup))tsne_out %>%
.$Y %>%
as_tibble() %>%
plot_ly(
x = .$V1,
y = .$V2,
z = .$V3,
type = "scatter3d",
mode = "markers",
color = I("blue"),
alpha = .1
) %>%
layout(title = "All experiments together")